suppressPackageStartupMessages({
library(slingshot)
library(SingleCellExperiment)
library(ggplot2)
})
library(transfactor)
## Loading required package: glmnet
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:S4Vectors':
##
## expand
## Loaded glmnet 4.1-4
## Loading required package: fastmatch
## Loading required package: arrayhelpers
## Package arrayhelpers, version 1.1-0
##
## If you use this package please cite it appropriately.
## citation("arrayhelpers")
## will give you the correct reference.
##
## The project homepage is http://arrayhelpers.r-forge.r-project.org/
##
## Attaching package: 'arrayhelpers'
## The following object is masked from 'package:IRanges':
##
## slice
## Loading required package: edgeR
## Loading required package: limma
##
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
##
## Attaching package: 'edgeR'
## The following object is masked from 'package:SingleCellExperiment':
##
## cpm
## Loading required package: tibble
dataDir <- "~/Dropbox/research/brainStat/hbcRegenGithub/data/"
sds <- readRDS(paste0(dataDir, "finalTrajectory/sling.rds"))
counts <- readRDS(paste0(dataDir, "finalTrajectory/counts_noResp_noMV.rds"))
counts <- round(counts)
countsAll <- counts
# sce <- readRDS(paste0(dataDir, "finalTrajectory/sce_tradeSeq20200904.rds"))
load(paste0(dataDir, "/ALL_TF.Rda"))
tf <- intersect(ALL_TF, rownames(counts))
clDatta <- readRDS(paste0(dataDir, "finalTrajectory/dattaCl_noResp_noMV.rds"))
## get neuronal cells
RNGversion("3.5.0")
## Warning in RNGkind("Mersenne-Twister", "Inversion", "Rounding"): non-uniform
## 'Rounding' sampler used
set.seed(11)
cw <- slingCurveWeights(sds)
pt <- slingPseudotime(sds, na=FALSE)
wSamp <- tradeSeq:::.assignCells(cw)
neurCells <- which(wSamp[,1] == 1)
pt1 <- pt[neurCells, 1]
oot1 <- order(pt1, decreasing=FALSE)
pt1Groups <- Hmisc::cut2(pt1, g=20)
U <- model.matrix(~ -1 + pt1Groups)
ct1 <- clDatta[neurCells][oot1]
cellTypeBinCounts <- sapply(levels(pt1Groups), function(gg){
table(ct1[which(pt1Groups == gg)])
})
majorCellTypeBin <- factor(c(rep("HBC*", 5), rep("GBC", 3),
rep("iOSN", 10), rep("mOSN", 2)),
levels = c("HBC*", "GBC", "iOSN", "mOSN"))
counts <- counts[,neurCells][,oot1]
# set anaconda in path
Sys.setenv(PATH=paste0('/Users/koenvandenberge/opt/anaconda3/bin:', Sys.getenv('PATH')))
# use anaconda env in reticulate
library(reticulate)
#use_condaenv("r-reticulate")
#p1 <- "/Users/koenvandenberge/opt/anaconda3/bin/python3"
p2 <- "/Library/Frameworks/Python.framework/Versions/3.8/bin/python3.8"
#p3 <- "/Users/koenvandenberge/opt/anaconda3/bin/python3"
reticulate::use_python(p2)
library(reticulate)
## new Mac
# p1 <- "/usr/bin/python"
p2 <- "/Users/koenvandenberge/opt/anaconda3/bin/python3"
reticulate::use_python(p2)
import pickle
import pandas as pd
import ctxcore
import pyscenic
from ctxcore import genesig
import sys
sys.modules['pyscenic.genesig']=genesig
file="/Users/koenvandenberge/Dropbox/research/GRN/evaluateGRN/oe10x/pySCENIC_results/output2_prune.dat"
with open(file, "rb") as f:
regulons = pickle.load(f)
dfList = list()
tfNames = list()
for ii in range(len(regulons)):
tf = regulons[ii].name
tfNames.append(tf)
geneWeights = regulons[ii].gene2weight
df = pd.DataFrame.from_dict(geneWeights, orient='index')
dfList.append(df)
tfNames <- py$tfNames
tfNames <- gsub(x=tfNames, pattern="(+)", fixed=TRUE, replacement="")
targetList <- py$dfList
names(targetList) <- tfNames
allTargetGenes <- unique(unlist(Reduce(c, lapply(targetList, rownames))))
length(allTargetGenes)
tfRep <- unlist(mapply(rep, tfNames, unlist(lapply(targetList, nrow))))
targetRep <- unlist(lapply(targetList, rownames))
strengths <- unlist(lapply(targetList, function(x) x[,1]))
alpha <- matrix(0, nrow = length(allTargetGenes), ncol=length(tfNames),
dimnames = list(allTargetGenes, tfNames))
alpha[cbind(targetRep, tfRep)] <- strengths
X <- alpha
X[X != 0] <- 1
saveRDS(X, file="../data/oeGRN_processed.rds")
X <- readRDS("../data/oeGRN_processed.rds")
## filter counts
counts <- counts[rownames(counts) %in% rownames(X),]
par(mfrow=c(1,2))
barplot(table(rowSums(X)), main="By how many TFs is a gene regulated?")
barplot(table(colSums(X)), main="How many genes is a TF regulating?")
dim(X)
## [1] 7863 262
poisLassoRes <- transfactor::estimateActivity(counts=as.matrix(counts),
X=X,
model="poisson",
U=U,
maxIter=1500,
plot=TRUE,
verbose=TRUE,
epsilon=1/2,
sparse=TRUE,
repressions=FALSE)
date <- Sys.Date()
saveRDS(poisLassoRes, file=paste0("../data/poisRes_", date,".rds"))
### the function used for reproducibility reasons provided here
tfCounts <- function (mu_gtc = "matrix", counts = "matrix", ...)
{
.local <- function (mu_gtc, counts, design = NULL)
{
if (is.null(design)) {
message("No design matrix provided. Working with intercept only.")
ict <- rep(1, length = ncol(counts))
design <- stats::model.matrix(~-1 + ict)
}
if (nrow(design) != ncol(counts)) {
stop("Dimensions of design matrix and count matrix don't match.")
}
tfRows <- unlist(lapply(strsplit(rownames(mu_gtc), split = ";"),
"[[", 1))
geneRows <- unlist(lapply(strsplit(rownames(mu_gtc),
split = ";"), "[[", 2))
tfUniq <- unique(tfRows)
geneUniq <- unique(geneRows)
lvl <- unlist(apply(design, 1, function(row) {
which(row == 1)
}))
colnames(mu_gtc) <- paste0("ct", 1:ncol(mu_gtc))
mu_gtc_tibble <- suppressWarnings(tibble::as_tibble(mu_gtc))
Y_ti <- matrix(0, nrow = length(tfUniq), ncol = ncol(counts),
dimnames = list(tfUniq, colnames(counts)))
for (gg in 1:length(geneUniq)) {
curGene <- geneUniq[gg]
id <- which(geneRows == curGene)
curTFs <- tfRows[id]
curMu <- as.matrix(mu_gtc_tibble[id, 1:ncol(design)])
curProbs <- sweep(curMu, 2, colSums(curMu) + 1e-10,
"/")
curProbs <- curProbs[, lvl, drop = FALSE]
Y_ti[curTFs, ] <- Y_ti[curTFs, ] + sweep(curProbs,
2, counts[curGene, ], "*")
}
return(Y_ti)
}
}
poisLassoRes <- readRDS("../data/poisRes_2023-06-21.rds")
Y_ti_poisson <- transfactor::tfCounts(mu_gtc=poisLassoRes$mu_gtc, counts=as.matrix(counts), design=U)
Y_tc_poisson <- Y_ti_poisson %*% U
library(pheatmap)
Yhat_tc_poisson <- Y_tc_poisson %*% diag(1/colSums(U))
colnames(Yhat_tc_poisson) <- colnames(U)
## heatmap on most variable TFs using activity counts
ooVar <- order(rowVars(Yhat_tc_poisson), decreasing=TRUE)
top20TFs <- rownames(Yhat_tc_poisson)[ooVar[1:20]]
top20TFs
## [1] "Hdac2" "Fos" "Rfx3" "Sox11" "Ezh2" "Egr1" "Cebpg" "Junb"
## [9] "E2f1" "Hes6" "Xbp1" "Rcor1" "Zbtb7b" "Atf5" "Kdm5a" "Srebf1"
## [17] "Foxa1" "Jund" "Creb3" "Eomes"
pheatmap(t(scale(t(Yhat_tc_poisson[ooVar[1:20],]))), cluster_cols = FALSE)
# pdf("../plots/highVarTF_poissonLasso.pdf")
# pheatmap(t(scale(t(Yhat_tc_poisson[ooVar[1:20],]))), cluster_cols = FALSE)
# dev.off()
Yhat_tc_poisson <- Yhat_tc_poisson[rowSums(Yhat_tc_poisson) > 0,]
Yhat_tc_poisson <- Yhat_tc_poisson[rowVars(Yhat_tc_poisson) > 0,]
yhatScaled <- t(scale(t(Yhat_tc_poisson)))
## heatmap on most variable TFs using scaled activity
pheatmap(yhatScaled, cluster_cols = FALSE, show_colnames = FALSE)
# pdf("../plots/allTF_poissonLasso.pdf", height=18)
# pheatmap(yhatScaled, cluster_cols = FALSE,
# show_colnames = FALSE, show_rownames = TRUE,
# border_color = NA)
# dev.off()
# pdf("../plots/allTF_poissonLasso_noNames.pdf", height=10)
pheatmap(yhatScaled, cluster_cols = FALSE,
show_colnames = FALSE, show_rownames = FALSE,
border_color = NA)
# dev.off()
ph <- pheatmap(yhatScaled, cluster_cols = FALSE,
show_colnames = FALSE, show_rownames = FALSE,
border_color = NA)
cl <- cutree(ph$tree_row, k = 3)
anro <- data.frame(cluster=factor(cl))
rownames(anro) <- ph$tree_row$labels
ancol <- data.frame(cellType=majorCellTypeBin)
rownames(ancol) <- colnames(yhatScaled)
# pdf("../plots/allTF_poissonLasso_noNames_annotated.pdf", height=10)
pheatmap(yhatScaled, cluster_cols = FALSE,
show_colnames = FALSE, show_rownames = FALSE,
border_color = NA, annotation_row = anro,
annotation_names_row = TRUE, annotation_legend = TRUE,
annotation_col = ancol)
# dev.off()
## annotate top 20 TFs in big heatmap
phpaper <- pheatmap(yhatScaled, cluster_cols = FALSE,
show_colnames = FALSE, show_rownames = FALSE,
border_color = NA, annotation_row = anro,
annotation_names_row = TRUE, annotation_legend = TRUE,
annotation_col = ancol)
# Gene enrichment on TFs doesn't provide any results.
write.table(colnames(X), file="../data/tfUniverseOE10X.txt",
row.names=FALSE, col.names=FALSE, quote=FALSE)
# cl 1 is HBC
write.table(names(cl)[cl==1], file="../data/tfCl1_oe10x.txt",
row.names=FALSE, col.names=FALSE, quote=FALSE)
# neur
write.table(names(cl)[cl==2], file="../data/tfCl2_oe10x.txt",
row.names=FALSE, col.names=FALSE, quote=FALSE)
# GBC
write.table(names(cl)[cl==3], file="../data/tfCl3_oe10x.txt",
row.names=FALSE, col.names=FALSE, quote=FALSE)
# Gene enrichment on genes regulated by the TFs
getPi_gtc_sufStats <- function(mu_gtc, counts, pt=NULL, qSteps=0.01, U=NULL){
require(fastmatch)
if(!is.null(U)){
glm <- TRUE
gam <- FALSE
}
if(!is.null(pt)){
glm <- FALSE
gam <- TRUE
}
if(!is.null(pt)){
ptGroups <- Hmisc::cut2(pt, cuts = quantile(pt, prob=seq(0,1,by=qSteps)))
Xpt <- model.matrix(~0+ptGroups)
design <- Xpt
}
if(glm){
lvl <- unlist(apply(U,1, function(row){
which(row == 1)
}))
design <- U
}
tfRows <- unlist(lapply(strsplit(rownames(mu_gtc), split=";"), "[[", 1))
geneRows <- unlist(lapply(strsplit(rownames(mu_gtc), split=";"), "[[", 2))
tfUniq <- unique(tfRows)
geneUniq <- unique(geneRows)
rn <- rownames(mu_gtc)
pi_gtc <- list()
for(gg in 1:length(geneUniq)){
curGene <- geneUniq[gg]
id <- which(geneRows == curGene)
curTFs <- tfRows[id]
rowSel <- fastmatch::fmatch(paste0(curTFs,";",curGene), rn)
curMu <- mu_gtc[rowSel,,drop=FALSE]
curProbs <- sweep(curMu, 2, colSums(curMu)+1e-10, "/")
pi_gtc[[gg]] <- curProbs
}
names(pi_gtc) <- geneUniq
return(pi_gtc)
}
pi_gtc <- getPi_gtc_sufStats(mu_gtc = poisLassoRes$mu_gtc,
counts = as.matrix(counts),
pt = NULL,
qSteps = NULL,
U = U)
pi_gtc <- do.call(rbind, pi_gtc)
getGenesAssociatedWithTF <- function(tf, pi_gtc, X){
# get all genes regulated by that TF
genes <- names(which(X[,tf] == 1))
geneListTF <- c()
# for each gene check if that TF has the major contribution
for(gg in 1:length(genes)){
curPi <- pi_gtc[grep(x=rownames(pi_gtc), paste0(";",genes[gg],"$")),,drop=FALSE]
if(all(curPi == 0)) next
maxTF <- apply(curPi,2, function(bin){
if(all(bin == 0)) return(NA)
which.max(bin)
})
if(paste0(tf,";",genes[gg]) %in% rownames(curPi)[maxTF]){
geneListTF <- c(geneListTF, genes[gg])
}
}
return(geneListTF)
}
tf1 <- names(cl[cl == 1])
tf2 <- names(cl[cl == 2])
tf3 <- names(cl[cl == 3])
genesCluster1 <- unique(unlist(sapply(tf1, function(x){
getGenesAssociatedWithTF(tf=x, pi_gtc=pi_gtc, X=X)
})))
genesCluster2 <- unique(unlist(sapply(tf2, function(x){
getGenesAssociatedWithTF(tf=x, pi_gtc=pi_gtc, X=X)
})))
genesCluster3 <- unique(unlist(sapply(tf3, function(x){
getGenesAssociatedWithTF(tf=x, pi_gtc=pi_gtc, X=X)
})))
write.table(rownames(counts), file="../data/genesAll.txt",
row.names=FALSE, col.names=FALSE, quote=FALSE)
write.table(genesCluster1, file="../data/genesCl1_oe10x.txt",
row.names=FALSE, col.names=FALSE, quote=FALSE)
write.table(genesCluster2, file="../data/genesCl2_oe10x.txt",
row.names=FALSE, col.names=FALSE, quote=FALSE)
write.table(genesCluster3, file="../data/genesCl3_oe10x.txt",
row.names=FALSE, col.names=FALSE, quote=FALSE)
## results for BP
bpHBC <- read.csv("../data/gProfiler_HBC.csv")
bpGBC <- read.csv("../data/gProfiler_GBC.csv")
bpNeur <- read.csv("../data/gProfiler_neuronal.csv")
# xtable::xtable(bpHBC[1:20,2,drop=FALSE])
# xtable::xtable(bpGBC[1:20,2,drop=FALSE])
# xtable::xtable(bpNeur[1:20,2,drop=FALSE])
Note that an archived version of gProfiler is used as some GO annotations were missing in Ensembl 103, so we are using Ensembl 102. This version can be accessed using https://biit.cs.ut.ee/gprofiler_archive3/e102_eg49_p15/gost.
Results for cluster 1 (HBC TFs): https://biit.cs.ut.ee/gplink/l/MYQUL6K8Sb Results for cluster 2 (neuronal TFs): https://biit.cs.ut.ee/gplink/l/UYOKvLKVSe Results for cluster 3 (GBC TFs): https://biit.cs.ut.ee/gplink/l/P1Ea9pBuTb
library(slingshot)
library(tradeSeq)
pcCounts <- scater::calculatePCA(x = log1p(counts),
ncomponents = 10,
ntop = 1e3)
umapCounts <- uwot::umap(pcCounts, min_dist = 0.8)
dfUMAPCounts <- data.frame(UMAP1=umapCounts[,1],
UMAP2=umapCounts[,2],
cellType=droplevels(ct1))
set.seed(3)
cl <- kmeans(as.matrix(dfUMAPCounts[,1:2]), centers=4)
plot(as.matrix(dfUMAPCounts[,1:2]), pch=16, col= cl$cluster)
lin <- getLineages(data = as.matrix(dfUMAPCounts[,1:2]),
clusterLabels = cl$cluster,
start.clus = 3,
end.clus = 2)
plot(dfUMAPCounts[,1:2], col=dfUMAPCounts$cellType) ; slingshot:::lines.SlingshotDataSet(as.SlingshotDataSet(lin), col="black", lwd=2)
## as(<dsCMatrix>, "dgCMatrix") is deprecated since Matrix 1.5-0; do as(., "generalMatrix") instead
# plot(dfUMAPCounts[,1:2], col=dfUMAPCounts$cellType) ; lines(lin, col="black", lwd=2)
sds <- getCurves(lin, extend = 'n')
plot(dfUMAPCounts[,1:2], col=dfUMAPCounts$cellType) ; slingshot:::lines.SlingshotDataSet(as.SlingshotDataSet(sds), col="black", lwd=2)
# plot(dfUMAPCounts[,1:2], col=dfUMAPCounts$cellType) ; lines(sds, col="black", lwd=2)
sceGAMGene <- fitGAM(counts = as.matrix(counts),
sds = sds,
nknots = 5,
verbose = FALSE)
saveRDS(sceGAMGene, file="../data/sceGAMGene.rds")
# sceGAMGene <- readRDS("../data/sceGAMGene.rds")
sceGAMTF <- fitGAM(counts = Y_ti_poisson,
sds = sds,
nknots = 5,
verbose = FALSE)
saveRDS(sceGAMTF, file="../data/sceGAMTF.rds")
# sceGAMTF <- readRDS("../data/sceGAMTF.rds")
resGene <- associationTest(sceGAMGene)
resTF <- associationTest(sceGAMTF)
sum(p.adjust(resGene$pvalue, "fdr") <= 0.05, na.rm=TRUE) # 7345
## [1] 6977
sum(p.adjust(resTF$pvalue, "fdr") <= 0.05, na.rm=TRUE) # 216
## [1] 209
resGene_fc <- associationTest(sceGAMGene, l2fc = log2(2))
resTF_fc <- associationTest(sceGAMTF, l2fc = log2(2))
sum(p.adjust(resGene_fc$pvalue, "fdr") <= 0.05, na.rm=TRUE) # 5888
## [1] 4752
sum(p.adjust(resTF_fc$pvalue, "fdr") <= 0.05, na.rm=TRUE) # 144
## [1] 135
## for reproducibility
tfDistance <- function (activity = "list", X = "matrix", counts = "matrix",
U = "matrix", ...)
{
.local <- function (activity, X, counts, U, cellGroups = NULL,
distance = "Euclidean", scaleDistance = FALSE, contrast = "consecutive",
referenceGroup = NULL)
{
if (is.null(colnames(X))) {
colnames(X) <- paste0("tf", 1:ncol(X))
}
if (any(X == -1)) {
X[X == -1] <- 0
}
if (!is.null(cellGroups)) {
if (is.character(cellGroups)) {
cellGroups <- which(colnames(U) %in% cellGroups)
}
}
if (distance %in% c("Euclidean", "L1")) {
pi_gtc <- getPi_gtc_sufStats(activity$mu_gtc, counts = as.matrix(counts),
U = U)
mu_gc <- ((counts %*% U) %*% diag(1/colSums(U)))
pi_gtc_mat <- do.call(rbind, pi_gtc)
pi_tfNames <- unlist(lapply(strsplit(rownames(pi_gtc_mat),
split = ";"), "[[", 1))
pi_geneNames <- unlist(lapply(strsplit(rownames(pi_gtc_mat),
split = ";"), "[[", 2))
allTF <- colnames(X)
tfDist <- vector(length = length(allTF))
for (tt in seq_len(length(allTF))) {
curTF <- allTF[tt]
curTargets <- rownames(X)[which(X[, curTF] ==
1)]
rowSel <- fastmatch::fmatch(paste0(curTF, ";",
curTargets), rownames(pi_gtc_mat))
curPi <- pi_gtc_mat[rowSel, , drop = FALSE]
if (!is.null(cellGroups)) {
curDist <- .distanceCalculation_original(mu_gc = mu_gc[curTargets,
], pi_gtc = curPi, cellID = cellGroups, distance = distance,
scaleDistance = scaleDistance)
}
if (contrast == "consecutive") {
conDist <- c()
for (kk in seq_len(ncol(U) - 1)) {
conDist[kk] <- .distanceCalculation_original(mu_gc = mu_gc[curTargets,
, drop = FALSE], pi_gtc = curPi, cellID = c(kk,
kk + 1), distance = distance, scaleDistance = scaleDistance)
}
curDist <- sum(conDist)
}
else if (contrast == "reference") {
refDist <- c()
varsToCompare <- seq_len(ncol(U))[-referenceGroup]
for (kk in seq_len(length(varsToCompare))) {
refDist[kk] <- .distanceCalculation_original(mu_gc = mu_gc[curTargets,
], pi_gtc = curPi, cellID = c(referenceGroup,
varsToCompare[kk]), distance = distance,
scaleDistance = scaleDistance)
}
curDist <- sum(refDist)
}
tfDist[tt] <- curDist
}
names(tfDist) <- allTF
}
else if (distance %in% c("rank", "EuclideanTF", "L1TF")) {
if (!is.null(cellGroups)) {
curDist <- .distanceCalculation_newMethods(mu_tc = activity$mu_tc,
cellID = cellGroups, distance = distance, scaleDistance = scaleDistance)
return(curDist)
}
if (contrast == "consecutive") {
conDist <- matrix(NA, nrow = ncol(X), ncol = ncol(U) -
1)
for (kk in seq_len(ncol(U) - 1)) {
conDist[, kk] <- .distanceCalculation_newMethods(mu_tc = activity$mu_tc,
cellID = c(kk, kk + 1), distance = distance,
scaleDistance = scaleDistance)
}
tfDist <- rowSums(conDist)
names(tfDist) <- rownames(activity$mu_tc)
}
else if (contrast == "reference") {
varsToCompare <- seq_len(ncol(U))[-referenceGroup]
refDist <- matrix(NA, nrow = ncol(X), ncol = ncol(U) -
1)
for (kk in seq_len(length(varsToCompare))) {
refDist[, kk] <- .distanceCalculation_newMethods(mu_tc = activity$mu_tc,
cellID = c(referenceGroup, varsToCompare[kk]),
distance = distance, scaleDistance = scaleDistance)
}
tfDist <- rowSums(refDist)
names(tfDist) <- rownames(activity$mu_tc)
}
}
return(tfDist)
}
.local(activity, X, counts, U, ...)
}
tfDist_euclidConsec <- transfactor::tfDistance(activity = poisLassoRes,
X = X,
counts = counts,
U = U,
scaleDistance = FALSE,
contrast = "consecutive")
tfDist_euclidConsecScaled <- transfactor::tfDistance(activity = poisLassoRes,
X = X,
counts = counts,
U = U,
scaleDistance = TRUE,
contrast = "consecutive")
tfDist_l1Consec <- transfactor::tfDistance(activity = poisLassoRes,
X = X,
counts = counts,
U = U,
distance = "L1",
contrast = "consecutive",
scaleDistance = FALSE)
tfDist_l1ConsecScaled <- transfactor::tfDistance(activity = poisLassoRes,
X = X,
counts = counts,
U = U,
distance = "L1",
contrast = "consecutive",
scaleDistance = TRUE)
# Euclidean vs L1 unscaled distance: top rank is quite different.
plot(tfDist_euclidConsec+1, tfDist_l1Consec+1, log="xy")
# Euclidean vs L1 scaled distance
par(mfrow=c(1,2))
par(bty='l')
plot(tfDist_euclidConsecScaled+1, tfDist_l1ConsecScaled+1, log="xy",
xlab = "Euclidean scaled distance",
ylab = "L1 scaled distance")
## MD plot
par(bty='l')
plot(x = rowMeans(cbind(scale(tfDist_euclidConsecScaled), scale(tfDist_l1ConsecScaled))),
y = scale(tfDist_euclidConsecScaled) - scale(tfDist_l1ConsecScaled),
xlab = "Average normalized distance",
ylab = "Difference in normalized distance",
main = "Euclidean versus L1 distance")
abline(h=0, lty=2, col="red")
# Euclidean scaled vs unscaled
plot(tfDist_euclidConsecScaled+1, tfDist_euclidConsec+1, log="xy")
# L1 scaled vs unscaled
plot(tfDist_l1ConsecScaled+1, tfDist_l1Consec+1, log="xy")
# plot top TFs for scaled Euclidean
pi_ti_poisson <- sweep(Y_ti_poisson,2,colSums(Y_ti_poisson),"/")
head(sort(tfDist_euclidConsecScaled, decreasing=TRUE), 20)
## Hdac2 Fos Rfx3 Ezh2 Egr1 Junb Sox11 E2f1
## 296.05713 217.41799 179.15074 122.98668 95.67083 90.23887 79.25346 75.06815
## Jund Hes6 Srebf1 Xbp1 Foxa1 Cebpg Eomes Creb3
## 58.19772 55.42522 54.24402 52.11109 48.72060 48.02092 47.51331 46.56091
## Zbtb7b Cebpb Ybx1 Rcor1
## 45.79742 41.70045 39.62964 36.28025
rafalib::mypar(mfrow=c(3,3))
for(tt in 1:9){
boxplot(pi_ti_poisson[names(tfDist_euclidConsecScaled)[order(tfDist_euclidConsecScaled, decreasing=TRUE)[tt]], ] ~ pt1Groups, las=2,
main=paste0(names(tfDist_euclidConsecScaled)[order(tfDist_euclidConsecScaled, decreasing=TRUE)[tt]]," activity"),
ylab="Contribution to cell gene expression",
xaxt = "n")
}
## of all distances, how much do they explain? 42%
ooEuclidScaled <- order(tfDist_euclidConsecScaled, decreasing=TRUE)
sum(tfDist_euclidConsecScaled[ooEuclidScaled[1:9]]) / sum(tfDist_euclidConsecScaled)
## [1] 0.4188502
# plot top TFs for scaled L1
head(sort(tfDist_l1ConsecScaled, decreasing=TRUE), 20)
## Fos Egr1 Bhlhe40 Rfx3 Atf3 Hdac2 Ezh2 Klf5
## 4580.750 3362.876 3292.897 3070.636 3026.885 2850.889 2720.771 2454.328
## Cebpb Klf4 Zfp467 Jun E2f1 Xbp1 Srebf1 E2f8
## 2225.967 2136.757 2071.616 2021.944 1967.249 1834.015 1551.442 1389.432
## Bcl3 Ets1 Egr2 Lmo2
## 1382.979 1349.842 1327.616 1250.132
rafalib::mypar(mfrow=c(3,3))
for(tt in 1:9){
boxplot(pi_ti_poisson[names(tfDist_l1ConsecScaled)[order(tfDist_l1ConsecScaled, decreasing=TRUE)[tt]], ] ~ pt1Groups, las=2,
main=paste0(names(tfDist_l1ConsecScaled)[order(tfDist_l1ConsecScaled, decreasing=TRUE)[tt]]," activity"),
ylab="Contribution to cell gene expression",
xaxt = "n")
}
HDAC known to be very important: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4122610/ Egr1 and FOS upregulation upon injury in epithelium: https://journals.physiology.org/doi/full/10.1152/ajpgi.1999.276.2.G322 FOS essential for other epithelia e.g. corneal epithelium https://pubmed.ncbi.nlm.nih.gov/18369693/ FOS is upregulated when basal cells in airway epithelium is perturbed: https://www.biorxiv.org/content/10.1101/451807v1.full.pdf EZH2 protein activity shown to be highest in GBC in this paper: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6209616/ Atf3 specifically expressed upon injury: https://iovs.arvojournals.org/article.aspx?articleid=2162580 Sox11 is essential for neurogenesis: https://anatomypubs.onlinelibrary.wiley.com/doi/full/10.1002/dvdy.23962
# GO enrichment based on TF distance
library(msigdbr)
library(fgsea)
geneSets <- msigdbr(species = "Mus musculus", category = "C5", subcategory = "BP")
### filter background to only include genes that we assessed.
#geneSets$gene_symbol <- toupper(geneSets$gene_symbol)
geneSets <- geneSets[geneSets$gene_symbol %in% tf,]
m_list <- geneSets %>% split(x = .$gene_symbol, f = .$gs_name)
#stats <- assocRes$waldStat_lineage1_conditionMock
stats <- tfDist_euclidConsecScaled
eaRes <- fgsea(pathways = m_list, stats = stats, nperm = 5e4, minSize = 10)
## Warning in fgsea(pathways = m_list, stats = stats, nperm = 50000, minSize =
## 10): You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
eaRes[order(eaRes$NES, decreasing=TRUE),]
## pathway pval
## 1: GOBP_POSITIVE_REGULATION_OF_CELL_DEVELOPMENT 0.001645496
## 2: GOBP_REGULATION_OF_GLIOGENESIS 0.004814278
## 3: GOBP_GLIAL_CELL_DIFFERENTIATION 0.006486182
## 4: GOBP_POSITIVE_REGULATION_OF_NERVOUS_SYSTEM_DEVELOPMENT 0.009004109
## 5: GOBP_POSITIVE_REGULATION_OF_NEUROGENESIS 0.009004109
## ---
## 471: GOBP_CELLULAR_COMPONENT_MORPHOGENESIS 0.951979612
## 472: GOBP_KIDNEY_EPITHELIUM_DEVELOPMENT 0.940850839
## 473: GOBP_REGULATION_OF_CELLULAR_COMPONENT_BIOGENESIS 0.967812494
## 474: GOBP_REGULATION_OF_GROWTH 0.990195098
## 475: GOBP_REGULATION_OF_DEVELOPMENTAL_GROWTH 0.996273467
## padj ES NES nMoreExtreme size
## 1: 0.1485118 0.9430054 1.3766714 81 14
## 2: 0.2042500 0.9384836 1.3646344 238 12
## 3: 0.2112980 0.9325994 1.3560784 321 12
## 4: 0.2112980 0.9246746 1.3445550 446 12
## 5: 0.2112980 0.9246746 1.3445550 446 12
## ---
## 471: 0.9600644 0.4810673 0.7022988 47439 14
## 472: 0.9575818 0.4759816 0.6885823 46398 10
## 473: 0.9739638 0.4565839 0.6665562 48228 14
## 474: 0.9922841 0.4272648 0.6282303 49484 20
## 475: 0.9962735 0.3526865 0.5128360 49458 12
## leadingEdge
## 1: Hdac2,Rfx3,Sox11,E2f1
## 2: Hdac2,Ezh2,Sox11,E2f1
## 3: Hdac2,Sox11
## 4: Hdac2,Sox11,E2f1,Myc
## 5: Hdac2,Sox11,E2f1,Myc
## ---
## 471: Lhx2,Isl1,Cux1,Klf7,Emx1
## 472: Myc,Rarb,Foxc1,Six1,Smad4
## 473: Isl1,Klf5,Hsf1,Brca1,Hcfc1,Six1,...
## 474: Msx1,Smarca4,Foxc1,Hey2,Ar,Six1,...
## 475: Foxc1,Hey2,Ar,Six1,Yy1,Srf,...
library(glmGamPoi)
countsTF <- countsAll[colnames(X), ][,neurCells][,oot1]
fit <- glm_gp(countsTF, U)
L <- matrix(0, nrow=ncol(fit$Beta), ncol=19)
rownames(L) <- colnames(fit$Beta)
L[cbind(2:20, 1:19)] <- 1
L[cbind(1:19, 1:19)] <- -1
deRes <- test_de(fit, contrast=L)
# correlation with f test statistic
plot(x = tfDist_euclidConsecScaled[deRes$name] + 1,
y = deRes$f_statistic + 1,
log = "xy",
xlab="TF Distance", ylab="F-test statistic")
library(glmGamPoi)
fit_Yti <- glm_gp(Y_ti_poisson, U)
L <- matrix(0, nrow=ncol(fit_Yti$Beta), ncol=19)
rownames(L) <- colnames(fit_Yti$Beta)
L[cbind(2:20, 1:19)] <- 1
L[cbind(1:19, 1:19)] <- -1
deRes_Yti <- test_de(fit_Yti, contrast=L)
# correlation with f test statistic
plot(x = tfDist_euclidConsecScaled[deRes_Yti$name] + 1,
y = deRes_Yti$f_statistic + 1,
log = "xy",
xlab="TF Distance", ylab="F-test statistic")
alpha <- X
constructViperRegulon <- function(X, alpha){
tfAll <- unlist(mapply(rep, colnames(X), each=colSums(abs(X))))
targetAll <- rownames(X)[unlist(apply(abs(X),2, function(x) which(x > 0)))]
mor <- X[abs(X)>0]
alphaAll <- alpha[abs(X)>0] / max(alpha)
dfReg <- data.frame(tf=tfAll,
target=targetAll,
mor=X[abs(X)>0],
likelihood=alphaAll)
dfReg <- dfReg[!duplicated(dfReg),]
dfReg$tf <- as.character(dfReg$tf)
dfReg$target <- as.character(dfReg$target)
regulon <- dorothea:::df2regulon(dfReg)
return(regulon)
}
viperRegulon <- constructViperRegulon(X, alpha)
library(viper)
enrichMat_viper <- viper(counts,
viperRegulon,
nes = TRUE,
method = "scale",
minsize = 2,
eset.filter = F,
adaptive.size = F)
##
## Computing the association scores
## Computing regulons enrichment with aREA
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|============================== | 44%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================= | 94%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
naRows <- which(is.nan(rowSums(enrichMat_viper)))
naRows
## named integer(0)
if(length(naRows) > 0){
enrichMat_viper <- enrichMat_viper[-naRows,]
}
library(AUCell)
##
## Attaching package: 'AUCell'
## The following object is masked from 'package:tradeSeq':
##
## plotGeneCount
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2
## ──
## ✔ tidyr 1.2.0 ✔ dplyr 1.0.9
## ✔ readr 2.1.2 ✔ stringr 1.4.1
## ✔ purrr 0.3.4 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::coalesce() masks fastmatch::coalesce()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks Matrix::expand(), S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ dplyr::slice() masks arrayhelpers::slice(), IRanges::slice()
## ✖ tidyr::unpack() masks Matrix::unpack()
library(GSEABase)
## Loading required package: annotate
## Loading required package: AnnotationDbi
##
## Attaching package: 'AnnotationDbi'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: XML
## Loading required package: graph
##
## Attaching package: 'graph'
##
## The following object is masked from 'package:XML':
##
## addNode
##
## The following object is masked from 'package:stringr':
##
## boundary
constructGenesets <- function(X, alpha){
tfAll <- unlist(mapply(rep, colnames(X), each=colSums(abs(X))))
targetAll <- rownames(X)[unlist(apply(abs(X),2, function(x) which(x > 0)))]
mor <- X[abs(X)>0]
alphaAll <- alpha[abs(X)>0] / max(alpha)
dfReg <- data.frame(tf=tfAll,
target=targetAll,
mor=X[abs(X)>0],
likelihood=alphaAll)
dfReg <- dfReg[!duplicated(dfReg),]
dfReg$tf <- as.character(dfReg$tf)
dfReg$target <- as.character(dfReg$target)
genesets = dfReg %>%
group_by(tf) %>%
summarise(geneset = list(GSEABase::GeneSet(target))) %>%
transmute(tf, geneset2 = pmap(., .f=function(tf, geneset, ...) {
setName(geneset) = tf
return(geneset)
})) %>%
deframe() %>%
GeneSetCollection()
}
genesets <- constructGenesets(X, alpha)
obj <- AUCell_buildRankings(t(scale(t(counts))), nCores=1, plotStats = F, verbose = F) %>%
AUCell_calcAUC(genesets, ., verbose=F)
## Warning in .AUCell_buildRankings(exprMat = exprMat, featureType = featureType,
## : nCores is no longer used. It will be deprecated in the next AUCell version.
resAUCell <- AUCell::getAUC(obj)
enrichMat_AUCell <- resAUCell[colnames(X),]
library(scater)
## Loading required package: scuttle
##
## Attaching package: 'scater'
## The following object is masked from 'package:limma':
##
## plotMDS
set.seed(44)
pcViper <- scater::calculatePCA(x = enrichMat_viper,
ncomponents = 10,
ntop = nrow(enrichMat_viper))
pcAUCell <- scater::calculatePCA(x = enrichMat_AUCell,
ncomponents = 10,
ntop = nrow(enrichMat_AUCell))
pcPois <- scater::calculatePCA(x = log1p(Y_ti_poisson),
ncomponents = 10,
ntop = nrow(Y_ti_poisson))
umapViperFull <- uwot::umap(pcViper, min_dist = 0.8)
umapAUCellFull <- uwot::umap(pcAUCell, min_dist = 0.8)
umapPoisLassoFull <- uwot::umap(pcPois, min_dist = 0.8)
pal <- wesanderson::wes_palette("Zissou1", n=20, type="continuous")
plot(umapViperFull, pch=16, cex=1/2, col=pal[pt1Groups], main="Viper")
plot(umapAUCellFull, pch=16, cex=1/2, col=pal[pt1Groups], main="AUCell")
plot(umapPoisLassoFull, pch=16, cex=1/2, col=pal[pt1Groups], main="Poisson model")
## Graph-based clustering to compare
# library(scran) ; library(bluster)
# clustViper <- clusterRows(umapViperFull, NNGraphParam(cluster.fun="louvain"))
# clustAUCell <- clusterRows(umapAUCellFull, NNGraphParam(cluster.fun="louvain"))
# clustPoisLasso <- clusterRows(umapPoisLassoFull, NNGraphParam(cluster.fun="louvain"))
# mclust::adjustedRandIndex(clustViper, pt1Groups)
# mclust::adjustedRandIndex(clustAUCell, pt1Groups)
# mclust::adjustedRandIndex(clustPoisLasso, pt1Groups)
set.seed(44)
pt1Groups10 <- as.character(pt1Groups)
for(gg in 1:nlevels(pt1Groups)){
curPt1Groups10 <- sample(rep(1:ceiling(table(pt1Groups)[gg] / 10), each=10)[1:table(pt1Groups)[gg]])
pt1Groups10[pt1Groups10 == levels(pt1Groups)[gg]] <- paste0(levels(pt1Groups)[gg],curPt1Groups10)
}
pt1Groups10f <- factor(pt1Groups10)
design10 <- model.matrix(~ 0 + pt1Groups10f)
counts10 <- counts %*% design10
poisLassoRes_design10 <- transfactor::estimateActivity(counts = counts,
X = X,
model = "poisson",
U = design10,
plot = FALSE,
verbose = FALSE,
maxIter = 1000,
epsilon = 1,
sparse = TRUE,
repressions = FALSE)
## Converged.
# converged after iteration 87. Log-lik: -42841940.422
saveRDS(poisLassoRes_design10, file="../data/230621_poisLassoRes_repr_design10Cells.rds")
alpha <- X
constructViperRegulon <- function(X, alpha){
tfAll <- unlist(mapply(rep, colnames(X), each=colSums(abs(X))))
targetAll <- rownames(X)[unlist(apply(abs(X),2, function(x) which(x > 0)))]
mor <- X[abs(X)>0]
alphaAll <- alpha[abs(X)>0] / max(alpha)
dfReg <- data.frame(tf=tfAll,
target=targetAll,
mor=X[abs(X)>0],
likelihood=alphaAll)
dfReg <- dfReg[!duplicated(dfReg),]
dfReg$tf <- as.character(dfReg$tf)
dfReg$target <- as.character(dfReg$target)
regulon <- dorothea:::df2regulon(dfReg)
return(regulon)
}
viperRegulon <- constructViperRegulon(X, alpha)
library(viper)
enrichMat_viper <- viper(counts10,
viperRegulon,
nes = TRUE,
method = "scale",
minsize = 2,
eset.filter = F,
adaptive.size = F)
##
## Computing the association scores
## Computing regulons enrichment with aREA
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|============================== | 44%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================= | 94%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
naRows <- which(is.nan(rowSums(enrichMat_viper)))
naRows
## named integer(0)
if(length(naRows) > 0){
enrichMat_viper <- enrichMat_viper[-naRows,]
}
library(AUCell)
library(tidyverse)
library(GSEABase)
constructGenesets <- function(X, alpha){
tfAll <- unlist(mapply(rep, colnames(X), each=colSums(abs(X))))
targetAll <- rownames(X)[unlist(apply(abs(X),2, function(x) which(x > 0)))]
mor <- X[abs(X)>0]
alphaAll <- alpha[abs(X)>0] / max(alpha)
dfReg <- data.frame(tf=tfAll,
target=targetAll,
mor=X[abs(X)>0],
likelihood=alphaAll)
dfReg <- dfReg[!duplicated(dfReg),]
dfReg$tf <- as.character(dfReg$tf)
dfReg$target <- as.character(dfReg$target)
genesets = dfReg %>%
group_by(tf) %>%
summarise(geneset = list(GSEABase::GeneSet(target))) %>%
transmute(tf, geneset2 = pmap(., .f=function(tf, geneset, ...) {
setName(geneset) = tf
return(geneset)
})) %>%
deframe() %>%
GeneSetCollection()
}
genesets <- constructGenesets(X, alpha)
obj <- AUCell_buildRankings(t(scale(t(counts10))), nCores=1, plotStats = F, verbose = F) %>%
AUCell_calcAUC(genesets, ., verbose=F)
## Warning in .AUCell_buildRankings(exprMat = exprMat, featureType = featureType,
## : nCores is no longer used. It will be deprecated in the next AUCell version.
resAUCell <- AUCell::getAUC(obj)
enrichMat_AUCell <- resAUCell[colnames(X),]
poisLassoRes_design10 <- readRDS("../data/230621_poisLassoRes_repr_design10Cells.rds")
library(scater)
set.seed(44)
pcViper <- scater::calculatePCA(x = enrichMat_viper,
ncomponents = 10,
ntop = nrow(enrichMat_viper))
pcAUCell <- scater::calculatePCA(x = enrichMat_AUCell,
ncomponents = 10,
ntop = nrow(enrichMat_AUCell))
pcPois <- scater::calculatePCA(x = log1p(poisLassoRes_design10$mu_tc),
ncomponents = 10,
ntop = nrow(poisLassoRes_design10$mu_tc))
umapViper <- uwot::umap(pcViper, min_dist = 0.8)
umapAUCell <- uwot::umap(pcAUCell, min_dist = 0.8)
umapPoisLasso <- uwot::umap(pcPois, min_dist = 0.8)
pal <- wesanderson::wes_palette("Zissou1", n=20, type="continuous")
plot(umapViper, pch=16, cex=1/2, col=pal[pt1Groups], main="Viper")
plot(umapAUCell, pch=16, cex=1/2, col=pal[pt1Groups], main="AUCell")
plot(umapPoisLasso, pch=16, cex=1/2, col=pal[pt1Groups], main="Poisson model")
poisRes <- transfactor::estimateActivity(counts=as.matrix(counts),
model = "poisson",
X=X,
U=U,
maxIter=1500,
plot=TRUE,
verbose=TRUE,
epsilon=1/2,
iterOLS = 0,
sparse = FALSE,
repressions = FALSE)
## iteration 1
## iteration 2. Log-lik: -13396647.339
## iteration 3. Log-lik: -12700489.376
## iteration 4. Log-lik: -12284307.455
## iteration 5. Log-lik: -12030788.411
## iteration 6. Log-lik: -11864398.815
## iteration 7. Log-lik: -11752410.015
## iteration 8. Log-lik: -11674510.096
## iteration 9. Log-lik: -11618506.945
## iteration 10. Log-lik: -11578236.107
## iteration 11. Log-lik: -11549520.683
## iteration 12. Log-lik: -11527618.469
## iteration 13. Log-lik: -11511779.407
## iteration 14. Log-lik: -11499660.917
## iteration 15. Log-lik: -11490704.006
## iteration 16. Log-lik: -11483308.539
## iteration 17. Log-lik: -11477496.051
## iteration 18. Log-lik: -11472496.447
## iteration 19. Log-lik: -11469119.494
## iteration 20. Log-lik: -11465860.143
## iteration 21. Log-lik: -11462927.361
## iteration 22. Log-lik: -11460802.967
## iteration 23. Log-lik: -11459236.578
## iteration 24. Log-lik: -11457848.292
## iteration 25. Log-lik: -11456647.873
## iteration 26. Log-lik: -11455558.379
## iteration 27. Log-lik: -11454619.318
## iteration 28. Log-lik: -11453744.565
## iteration 29. Log-lik: -11453121.161
## iteration 30. Log-lik: -11452486.78
## iteration 31. Log-lik: -11451923.756
## iteration 32. Log-lik: -11451463.494
## iteration 33. Log-lik: -11450934.151
## iteration 34. Log-lik: -11450543.272
## iteration 35. Log-lik: -11450301.99
## iteration 36. Log-lik: -11449984.093
## iteration 37. Log-lik: -11449739.713
## iteration 38. Log-lik: -11449508.738
## iteration 39. Log-lik: -11449346.257
## iteration 40. Log-lik: -11449130.304
## iteration 41. Log-lik: -11448983.599
## iteration 42. Log-lik: -11448779.184
## iteration 43. Log-lik: -11448600.221
## iteration 44. Log-lik: -11448449.283
## iteration 45. Log-lik: -11448314.514
## iteration 46. Log-lik: -11448167.924
## iteration 47. Log-lik: -11448039.436
## iteration 48. Log-lik: -11447966.219
## iteration 49. Log-lik: -11447815.298
## iteration 50. Log-lik: -11447673.663
## iteration 51. Log-lik: -11447543.481
## iteration 52. Log-lik: -11447430.409
## iteration 53. Log-lik: -11447319.217
## iteration 54. Log-lik: -11447205.959
## iteration 55. Log-lik: -11447105.444
## iteration 56. Log-lik: -11446978.526
## iteration 57. Log-lik: -11446909.879
## iteration 58. Log-lik: -11446836.306
## iteration 59. Log-lik: -11446774.464
## iteration 60. Log-lik: -11446689.053
## iteration 61. Log-lik: -11446620.06
## iteration 62. Log-lik: -11446574.701
## iteration 63. Log-lik: -11446523.471
## iteration 64. Log-lik: -11446460.14
## iteration 65. Log-lik: -11446374.229
## iteration 66. Log-lik: -11446305.941
## iteration 67. Log-lik: -11446257.058
## iteration 68. Log-lik: -11446198.871
## iteration 69. Log-lik: -11446162.398
## iteration 70. Log-lik: -11446152.326
## iteration 71. Log-lik: -11446073.932
## iteration 72. Log-lik: -11446020.039
## iteration 73. Log-lik: -11445961.155
## iteration 74. Log-lik: -11445933.084
## iteration 75. Log-lik: -11445902.881
## iteration 76. Log-lik: -11445883.455
## iteration 77. Log-lik: -11445839.627
## iteration 78. Log-lik: -11445807.91
## iteration 79. Log-lik: -11445739.227
## iteration 80. Log-lik: -11445689.703
## iteration 81. Log-lik: -11445655.86
## iteration 82. Log-lik: -11445587.809
## iteration 83. Log-lik: -11445555.751
## iteration 84. Log-lik: -11445526.856
## iteration 85. Log-lik: -11445481.638
## iteration 86. Log-lik: -11445468.552
## iteration 87. Log-lik: -11445435.837
## iteration 88. Log-lik: -11445425.284
## iteration 89. Log-lik: -11445392.503
## iteration 90. Log-lik: -11445371.145
## iteration 91. Log-lik: -11445326.757
## iteration 92. Log-lik: -11445280.704
## iteration 93. Log-lik: -11445260.159
## iteration 94. Log-lik: -11445233.757
## iteration 95. Log-lik: -11445199.269
## iteration 96. Log-lik: -11445188.839
## iteration 97. Log-lik: -11445168.627
## iteration 98. Log-lik: -11445135.614
## iteration 99. Log-lik: -11445140.46
## iteration 100. Log-lik: -11445112.766
## iteration 101. Log-lik: -11445064.217
## iteration 102. Log-lik: -11445035.753
## iteration 103. Log-lik: -11445009.115
## iteration 104. Log-lik: -11444999.576
## iteration 105. Log-lik: -11444995.923
## iteration 106. Log-lik: -11444965.32
## iteration 107. Log-lik: -11444946.643
## iteration 108. Log-lik: -11444931.35
## iteration 109. Log-lik: -11444925.169
## iteration 110. Log-lik: -11444915.509
## iteration 111. Log-lik: -11444888.29
## iteration 112. Log-lik: -11444867.403
## iteration 113. Log-lik: -11444852.042
## iteration 114. Log-lik: -11444836.956
## iteration 115. Log-lik: -11444826.982
## iteration 116. Log-lik: -11444805.473
## iteration 117. Log-lik: -11444801.622
## iteration 118. Log-lik: -11444792.357
## iteration 119. Log-lik: -11444774.622
## iteration 120. Log-lik: -11444750.735
## iteration 121. Log-lik: -11444714.222
## iteration 122. Log-lik: -11444697.843
## iteration 123. Log-lik: -11444693.727
## iteration 124. Log-lik: -11444688.483
## iteration 125. Log-lik: -11444664.174
## iteration 126. Log-lik: -11444666.056
## iteration 127. Log-lik: -11444661.4
## iteration 128. Log-lik: -11444650.605
## iteration 129. Log-lik: -11444646.309
## iteration 130. Log-lik: -11444636.734
## iteration 131. Log-lik: -11444630.251
## iteration 132. Log-lik: -11444619.538
## iteration 133. Log-lik: -11444603.832
## iteration 134. Log-lik: -11444599.41
## iteration 135. Log-lik: -11444576.5
## iteration 136. Log-lik: -11444571.582
## iteration 137. Log-lik: -11444548.387
## iteration 138. Log-lik: -11444530.745
## iteration 139. Log-lik: -11444519.636
## iteration 140. Log-lik: -11444521.066
## iteration 141. Log-lik: -11444509.938
## iteration 142. Log-lik: -11444479.831
## iteration 143. Log-lik: -11444474.614
## iteration 144. Log-lik: -11444469.337
## iteration 145. Log-lik: -11444464.701
## iteration 146. Log-lik: -11444465.89
## iteration 147. Log-lik: -11444467.07
## iteration 148. Log-lik: -11444462.256
## iteration 149. Log-lik: -11444450.86
## iteration 150. Log-lik: -11444440.042
## iteration 151. Log-lik: -11444415.819
## iteration 152. Log-lik: -11444416.458
## iteration 153. Log-lik: -11444405.984
## iteration 154. Log-lik: -11444393.84
## iteration 155. Log-lik: -11444394.884
## iteration 156. Log-lik: -11444389.38
## iteration 157. Log-lik: -11444377.779
## iteration 158. Log-lik: -11444378.771
## iteration 159. Log-lik: -11444379.719
## iteration 160. Log-lik: -11444374.76
## iteration 161. Log-lik: -11444369.612
## iteration 162. Log-lik: -11444370.568
## iteration 163. Log-lik: -11444347.394
## iteration 164. Log-lik: -11444335.645
## iteration 165. Log-lik: -11444324.562
## iteration 166. Log-lik: -11444323.666
## iteration 167. Log-lik: -11444324.514
## iteration 168. Log-lik: -11444325.356
## iteration 169. Log-lik: -11444326.192
## iteration 170. Log-lik: -11444327.022
## iteration 171. Log-lik: -11444327.847
## iteration 172. Log-lik: -11444322.547
## iteration 173. Log-lik: -11444319.878
## iteration 174. Log-lik: -11444314.61
## iteration 175. Log-lik: -11444308.908
## iteration 176. Log-lik: -11444303.783
## iteration 177. Log-lik: -11444298.31
## iteration 178. Log-lik: -11444280.528
## iteration 179. Log-lik: -11444268.995
## iteration 180. Log-lik: -11444263.185
## iteration 181. Log-lik: -11444263.88
## iteration 182. Log-lik: -11444270.485
## iteration 183. Log-lik: -11444265.253
## iteration 184. Log-lik: -11444265.929
## iteration 185. Log-lik: -11444255.011
## iteration 186. Log-lik: -11444255.676
## iteration 187. Log-lik: -11444256.337
## iteration 188. Log-lik: -11444256.993
## iteration 189. Log-lik: -11444245.102
## iteration 190. Log-lik: -11444245.739
## iteration 191. Log-lik: -11444234.294
## iteration 192. Log-lik: -11444234.899
## iteration 193. Log-lik: -11444223.408
## iteration 194. Log-lik: -11444223.983
## iteration 195. Log-lik: -11444224.554
## iteration 196. Log-lik: -11444219.004
## iteration 197. Log-lik: -11444206.506
## iteration 198. Log-lik: -11444194.569
## iteration 199. Log-lik: -11444195.084
## iteration 200. Log-lik: -11444189.071
## iteration 201. Log-lik: -11444189.574
## iteration 202. Log-lik: -11444188.316
## iteration 203. Log-lik: -11444188.824
## iteration 204. Log-lik: -11444182.797
## Converged.
tfDist_euclidConsecScaled_poisson <- transfactor::tfDistance(activity = poisRes,
X = X,
counts = counts,
U = U,
distance = "Euclidean",
contrast = "consecutive",
scaleDistance = TRUE)
# TF ranking
plot(x=tfDist_euclidConsecScaled+1, y=tfDist_euclidConsecScaled_poisson+1, log="xy",
xlab="TF distance: sparse initialization", ylab="TF distance: no sparse initialization")
oo1 <- order(tfDist_euclidConsecScaled, decreasing=TRUE)
oo2 <- order(tfDist_euclidConsecScaled_poisson, decreasing=TRUE)
head(cbind(oo1, oo2), 9) #all 9 still in top.
## oo1 oo2
## [1,] 89 89
## [2,] 63 63
## [3,] 183 183
## [4,] 61 61
## [5,] 45 45
## [6,] 107 107
## [7,] 196 196
## [8,] 37 37
## [9,] 108 108
# Dim red
Y_ti_poisson_noSparse <- transfactor::tfCounts(mu_gtc=poisRes$mu_gtc, counts=counts, design=U)
pcPois <- scater::calculatePCA(x = log1p(Y_ti_poisson_noSparse),
ncomponents = 10,
ntop = nrow(Y_ti_poisson_noSparse))
umapPois <- uwot::umap(pcPois, min_dist = 0.8)
plot(umapPois, pch=16, cex=1/2, col=pal[pt1Groups], main="Poisson model")
set.seed(2)
discPal <- c("#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E", "#E6AB02", "#A6761D", "gray40")
pcCounts <- scater::calculatePCA(x = log1p(counts),
ncomponents = 10,
ntop = 1e3)
umapCounts <- uwot::umap(pcCounts, min_dist = 0.8)
dfUMAPCounts <- data.frame(UMAP1=umapCounts[,1],
UMAP2=umapCounts[,2],
cellType=droplevels(ct1))
pTraj <- ggplot(dfUMAPCounts, aes(x=UMAP1, y=UMAP2, color=cellType)) +
geom_point(size=.2) +
scale_color_manual(values=discPal) +
theme_classic()
pTraj <- cowplot::plot_grid(pTraj, labels="a")
pTraj
ggsave("../plots/neuronalLineage.pdf")
## Saving 7 x 5 in image
wesanderson::wes_palette("Darjeeling1", 3)
annColors <- list(cellType=c('HBC*' = "#7570B3",
'GBC' = "#1B9E77",
'iOSN' = "#E7298A",
'mOSN' = "#66A61E"),
cluster=c('1' = "#FF0000",
'2' = "#00A08A",
'3' = "#F2AD00"))
phm <- pheatmap(yhatScaled, cluster_cols = FALSE,
show_colnames = FALSE, show_rownames = FALSE,
border_color = NA, annotation_row = anro,
annotation_names_row = TRUE, annotation_legend = TRUE,
annotation_col = ancol, legend = FALSE,
annotation_colors = annColors)
ooEuclidScaled <- order(tfDist_euclidConsecScaled, decreasing=TRUE)
topTFs <- names(tfDist_euclidConsecScaled[ooEuclidScaled[1:9]])
pTFList <- list()
for(tt in 1:9){
curTF <- topTFs[tt]
curDF <- data.frame(bin=pt1Groups,
contribution=pi_ti_poisson[curTF,])
pTFList[[tt]] <- ggplot(curDF, aes(x=bin, y=contribution, color=bin)) +
geom_boxplot() +
theme_bw() +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(pt1Groups), "continuous")) +
theme(axis.text.x = element_blank(),
legend.position = "none",
axis.title = element_text(size=5),
axis.text.y = element_text(size=5),
title = element_text(size=8)) +
xlab("Pseudotime") +
ylab("Contribution") +
ggtitle(curTF)
}
pTF <- cowplot::plot_grid(plotlist=pTFList, labels=c("c"))
pTF
ggsave("../plots/topTFsPoissonLasso.pdf")
## Saving 7 x 5 in image
ooEuclidScaled <- order(tfDist_euclidConsecScaled, decreasing=TRUE)
topTFs <- names(tfDist_euclidConsecScaled[ooEuclidScaled[1:length(tfDist_euclidConsecScaled)]])
pdf("../plots/allTFPlots.pdf")
for(tt in 1:length(tfDist_euclidConsecScaled)){
curTF <- topTFs[tt]
curDF <- data.frame(bin=pt1Groups,
contribution=pi_ti_poisson[curTF,])
print(ggplot(curDF, aes(x=bin, y=contribution, color=bin)) +
geom_boxplot() +
theme_bw() +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(pt1Groups), "continuous")) +
theme(axis.text.x = element_blank(),
legend.position = "none",
axis.title = element_text(size=5),
axis.text.y = element_text(size=5),
title = element_text(size=8)) +
xlab("Pseudotime") +
ylab("Contribution") +
ggtitle(paste0(curTF,": distance = ",round(tfDist_euclidConsecScaled[ooEuclidScaled[tt]], 3))))
}
dev.off()
## quartz_off_screen
## 2
plotUMAP <- function(dr, pt1Groups){
df <- data.frame(UMAP1=dr[,1],
UMAP2=dr[,2],
ptg=pt1Groups)
pal <- wesanderson::wes_palette("Zissou1", nlevels(pt1Groups), "continuous")
ggplot(df, aes(x=UMAP1, y=UMAP2, color=ptg)) +
geom_point(size=.2) +
scale_color_manual(values=pal) +
theme_classic() +
theme(legend.position = "none")
}
pViper <- plotUMAP(umapViperFull, pt1Groups) + ggtitle("viper")
pAUCell <- plotUMAP(umapAUCellFull, pt1Groups) + ggtitle("AUCell")
pPoisLasso <- plotUMAP(umapPoisLassoFull, pt1Groups) + ggtitle("Poisson lasso")
pPois <- plotUMAP(umapPois, pt1Groups) + ggtitle("Poisson")
pDR <- cowplot::plot_grid(pViper, pAUCell, pPoisLasso, pPois, labels="d")
pDR
pComp <- gridExtra::grid.arrange(pTraj, pTF, phm[[4]], pDR)
ggsave(file="../plots/oeComposite.pdf", pComp, width=11, height=12)